*------------------------------------------------------------------------------
* clean Wave 1 inhome data
*==============================================================================

* Biological Sex : bio_sex
gen female = (bio_sex == 2) if ~missing(bio_sex)

* Grade
tab h1gi20
gen grade_h = h1gi20
gen grade_s = s3

* Age : interview year/month - birth year/month
gen age_y = (iyear-h1gi1y)
gen age_ym = 12*(iyear-h1gi1y) + (imonth-h1gi1m)

recode h1gi1m (96=.), gen (w1bmonth)
recode h1gi1y (96=.), gen (w1byear)
gen w1bdate = mdy(w1bmonth, 15,1900+w1byear)
format w1bdate %d
gen w1idate=mdy(imonth, iday,1900+iyear)
format w1idate %d
gen w1age=int((w1idate-w1bdate)/365.25)

* Race
gen white = h1gi6a==1 if ~missing(h1gi6a)
gen black = h1gi6b==1 if ~missing(h1gi6b)
gen indian= h1gi6c==1 if ~missing(h1gi6c)
gen asian= h1gi6d==1 if ~missing(h1gi6d)
gen other= h1gi6e==1 if ~missing(h1gi6e)
gen hispanic = h1gi4 == 1 if ~missing(h1gi4)

egen multirace = rowtotal(white black indian asian other)
tab multirace
tab multirace h1gi8,m 

* Race : self-reported
gen race_s = .
replace race_s = 1 if white == 1 & multirace == 1
replace race_s = 2 if black == 1 & multirace == 1
replace race_s = 3 if indian == 1 & multirace == 1
replace race_s = 4 if asian == 1 & multirace == 1
replace race_s = 5 if other == 1 & multirace == 1
replace race_s = h1gi8 if multirace > 1

* Race : interviewer's assessment
gen race_i = h1gi9

* Race / hispanic over-writing
recode race_s (1=1) (2=2) (3 5=4) (4=4), gen(race_h)
replace race_h = 3 if hispanic == 1

* Demography 2009; Dohoon Lee
* Race-ethinic category: nine
* parent report : pa4 - pa5_1 pa5_2 pa5_3 pa5_4 pa5_5 pa5_6 pa5_7
* pa6_1 pa6_2 pa6_3 pa6_4 pa6_5
gen p_white = (pa6_1==1) if ~missing(pa6_1)
gen p_black = (pa6_2==1) if ~missing(pa6_2)
gen p_indian = (pa6_3==1) if ~missing(pa6_3)
gen p_asian = (pa6_4==1) if ~missing(pa6_4)
gen p_other = (pa6_5==1) if ~missing(pa6_5)

egen p_multirace = rowtotal(p_white p_black p_indian p_asian p_other)
tab p_multirace pa8b,m

gen p_race_s = .
replace p_race_s = 1 if p_multirace == 1 & p_white == 1
replace p_race_s = 2 if p_multirace == 1 & p_black == 1
replace p_race_s = 3 if p_multirace == 1 & p_indian == 1
replace p_race_s = 4 if p_multirace == 1 & p_asian == 1
replace p_race_s = 5 if p_multirace == 1 & p_other == 1
replace p_race_s = pa8b if p_multirace > 1

* interviewer's assessment
gen p_race_i = pa9

* immigration status
* first generation : foreign-born to foreign-born parents
* second generation : Us born to foreign-born parents
* third or higher : Us born to US born parents
* child's report in in-home
gen usborn = h1gi11
replace usborn = 1 if h1gi3 == 0

gen uscome_y = h1gi13y - h1gi1y
gen uscome_ym = (h1gi13y - h1gi1y)*12 + h1gi13m-h1gi1m

gen uscome_6older = (usborn==0) if ~missing(usborn)
replace uscome_6older = 0 if uscome_y < 6

gen bioma_usborn = h1nm6
gen biofa_usborn = h1nf6
gen resma_usborn = h1rm2
gen resfa_usborn = h1rf2

gen ma_usborn = bioma_usborn
replace ma_usborn = resma_usborn if missing(bioma_usborn)
gen fa_usborn = biofa_usborn
replace fa_usborn = resfa_usborn if missing(biofa_usborn)

gen immig_1 = .
replace immig_1 = 1 if usborn == 0
replace immig_1 = 2 if usborn == 1 & (ma_usborn == 0 & fa_usborn == 0)
replace immig_1 = 2 if usborn == 1 & (ma_usborn == 1 & fa_usborn == 0)
replace immig_1 = 2 if usborn == 1 & (ma_usborn == 0 & fa_usborn == 1)
replace immig_1 = 3 if usborn == 1 & (ma_usborn == 1 & fa_usborn == 1)

* public assistance 
gen ma_pub_ass = h1rm9
gen fa_pub_ass = h1rf9
gen family_pub_ass = . 
replace family_pub_ass = 0 if ma_pub_ass == 0 
replace family_pub_ass = 0 if fa_pub_ass == 0 
replace family_pub_ass = 1 if ma_pub_ass == 1 
replace family_pub_ass = 1 if fa_pub_ass == 1

* Family Income
gen family_income = pa55

* Family Structure; http://www.cpc.unc.edu/projects/addhealth/data/constructed-variables/family-structure-codebook
* Household roster -- h1hr3a, h1hr3b, .. h1hr3t
* father or mother? -- then h1hr6*
local i = 1
foreach X of varlist h1hr3* {
	gen hrel`i' = `X'
	local i = `i'+1
}

egen n_father = anycount(hrel*), values(11)
egen n_father_wife = anycount(hrel*), values(12)
egen n_father_partner = anycount(hrel*), values(13)
egen n_mother = anycount(hrel*), values(14)
egen n_mother_husband = anycount(hrel*), values(15)
egen n_mother_partner = anycount(hrel*), values(16)

recode n_father (1/2=1)
recode n_mother (1/2=1)

local i = 1
foreach X of varlist h1hr6* {
	gen prel`i' = `X'
	local i = `i'+1
}

egen n_biofather = anycount(prel*), values(1)
egen n_adoptfather = anycount(prel*), values(3)

egen n_biomother = anycount(prel*), values(7)
egen n_adoptmother = anycount(prel*), values(9)

tab n_biofather n_biomother
recode n_biofather (1/2=1)
recode n_biomother (1/2=1)

replace n_father = n_mother_husband if n_father == 0
gen n_nonbiofather = n_father
replace n_nonbiofather = 0 if n_biofather == 1

replace n_mother = n_father_wife if n_mother == 0
gen n_nonbiomother = n_mother
replace n_nonbiomother = 0 if n_biomother == 1

gen famst14 = .
replace famst14 = 1 if n_biomother == 1 & n_biomother == 1
replace famst14 = 2 if n_biomother == 1 & n_nonbiofather == 1
replace famst14 = 3 if n_nonbiomother == 1 & n_biofather == 1

replace famst14 = 4 if n_biomother == 1 & (n_father == 0 & n_adoptfather == 0 & n_mother_partner ==0)
replace famst14 = 5 if n_biofather == 1 & (n_mother == 0 & n_adoptmother == 0 & n_father_partner ==0)
replace famst14 = 6 if n_biomother == 1 & (n_father == 0 & n_adoptfather == 0 & n_mother_partner ==1)
replace famst14 = 7 if n_biofather == 1 & (n_mother == 0 & n_adoptmother == 0 & n_father_partner ==1)
replace famst14 = 8 if n_adoptmother == 1 & n_adoptfather == 1 & famst14 == .

replace famst14 = 9 if n_nonbiomother == 1 & (n_father == 0) & famst14 ==.
replace famst14 = 9 if n_father_partner == 1 & (n_father == 0) & famst14 == .

replace famst14 = 10 if n_nonbiofather == 1 & (n_mother == 0) & famst14 == .
replace famst14 = 10 if n_mother_partner == 1 & (n_mother == 0) & famst14 == .

replace famst14 = 11 if n_nonbiomother == 1 & n_nonbiofather == 1 & famst14 == .

replace famst14 = 12 if n_nonbiomother == 1 & (n_father == 0 & n_adoptfather == 0 & n_mother_partner ==1)
replace famst14 = 13 if n_nonbiofather == 1 & (n_mother == 0 & n_adoptmother == 0 & n_father_partner ==1)
replace famst14 = 14 if famst14 == .

* eight categories
recode famst14 (1=1) (2 6=2) (3 7=3) (8=4) (11 12 13=5) (4 9 = 6) (5 10=7) (14=8), gen(famst8)

* seven categories
recode famst8 (4 5 = 4), gen(famst7)

* five categories
recode famst7 (2 3 4 = 2), gen(famst5)

* Human capital
* parent's human capital
** parent's highest educational attainment
* no high school diploma, high school graduate(+GED), some college, college graduate

* child's report
gen bioma_educ = h1nm4 if ~missing(h1nm4)
gen biofa_educ = h1nf4 if ~missing(h1nf4)
gen resma_educ = h1rm1 if ~missing(h1rm1)
gen resfa_educ = h1rf1 if ~missing(h1rf1)

for var bioma_educ biofa_educ resma_educ resfa_educ : recode X (1=8) (2=11) (3 4 5=12) (6=13) (7=14) (8=16) (9=18) (10=0) (11/12=.)

* parent's reporting
gen p_parents = pc1
recode p_parents (1/4=1) (9/12=1) (else=0)
replace p_parents = . if missing(pc1)

* parent's report on immigration status
gen p_usborn = pa3 == 1 if ~missing(pa3) & p_parents == 1
*tab p_usborn if missing(immig_1)
gen immig_2 = immig_1
replace immig_2 = 2 if missing(immig_2) & usborn == 1 & p_usborn == 0
replace immig_2 = 3 if missing(immig_2) & usborn == 1 & p_usborn == 1

* Maternal education : reported by parent survey
gen p_maeduc = pa12 if pc1 < 5 & ~missing(pc1)
replace p_maeduc = pb8 if pc1 > 8 & pc1 <13 & ~missing(pc1) & pb2 == 2
replace p_maeduc = . if p_maeduc == 11 | p_maeduc == 12

recode p_maeduc (1=8) (2=11) (3 4 5=12) (6=13) (7=14) (8=16) (9=18)  (10=0)

* Father's education : reported by parent survey
gen p_faeduc = pa12 if pc1 >=9 & pc1 <= 12 
replace p_faeduc = pb8 if pc1 < 5 & ~missing(pc1) & pb2 == 1
replace p_faeduc = . if p_faeduc == 11 | p_faeduc == 12

recode p_faeduc (1=8) (2=11) (3 4 5=12) (6=13) (7=14) (8=16) (9=18) (10=0)

* Combine Parent and Respondent Report
gen c_maeduc = p_maeduc
replace c_maeduc = resma_educ if missing(c_maeduc)

gen c_faeduc = p_faeduc
replace c_faeduc = resfa_educ if missing(c_faeduc)

egen c_paeduc_max = rowmax(c_maeduc c_faeduc)


* parental attachment : student reports
gen pa_att_closema = h1wp9 
gen pa_att_carema = h1wp10

gen pa_att_closefa = h1wp13
gen pa_att_carefa = h1wp14

egen ma_attach = rowmean(pa_att_closema pa_att_carema)
egen fa_attach = rowmean(pa_att_closefa pa_att_carefa)

egen ma_attachs = rowtotal(pa_att_closema pa_att_carema)
egen fa_attachs = rowtotal(pa_att_closefa pa_att_carefa)

egen pa_attach_avg = rowmean(ma_attach fa_attach)
egen pa_attach_max = rowmax(ma_attach fa_attach)
egen pa_attach_min = rowmin(ma_attach fa_attach)

egen pa_attachs_avg = rowmean(ma_attachs fa_attachs)
egen pa_attachs_max = rowmax(ma_attachs fa_attachs)
egen pa_attachs_min = rowmin(ma_attachs fa_attachs)

* School attachment
* School attachment : 1 : strongly agree - 5: strongly disagree 
gen s_att1 = 6-s62b // feel close to people at school
gen s_att2 = 6-s62e // feel like part of school
gen s_att3 = 6-s62i // happy to be at school

alpha s_att*, gen(s_sattachment)

* Parent-child relationships : care 
*s16: How much do you think she cares about you? (mother) 1 : not at all - 5 : very much 
*s22: How much do you think he cares about you? (father) 1 : not at all - 5 : very much 
alpha s16 s22, gen(s_pattachment)

* Family - Sibling relationships
* sibship size
gen sibsize = h1hr14
recode sibsize (5/max=5), gen(sibsize5)
label var sibsize5 "Sibship Size (W1) : cap=5"

* PVT test score
gen pvt_std = ah_pvt
gen m_pvt = (1 ==missing(pvt_std))

gen pvt_raw = ah_raw

* Depressive Symptoms (depression)
gen dep_bother = h1fs1
gen dep_eating = h1fs2
gen dep_blues = h1fs3
gen dep_good_rev = 3-h1fs4
gen dep_keepmind = h1fs5
gen dep_depress = h1fs6
gen dep_tired = h1fs7
gen dep_hope_rev = 3-h1fs8
gen dep_failure = h1fs9
gen dep_fearful = h1fs10
gen dep_happy_rev = 3-h1fs11

gen dep_enjoy_rev = 3-h1fs15
gen dep_sad = h1fs16
gen dep_start = h1fs18
gen dep_life = h1fs19

sum dep_*
alpha dep_*

egen cesd = rowtotal(dep_*)

gen dep_talkless = h1fs12 
gen dep_lonley = h1fs13 
gen dep_unfriend = h1fs14 
gen dep_dislike = h1fs17 

egen fcesd = rowtotal(dep_*) 

for var dep_*: replace cesd = . if missing(X)
for var dep_*: replace fcesd = . if missing(X)

* region
gen region = h1ir12
gen rural = region ==1 if ~missing(region)

drop h1gi1m-smp08

foreach X of varlist * {
	if "`X'" != "aid" {
		rename `X' `X'_1
	}
}


